import jieba


# 1. 导入文本
def getTxt(path: str):
    return open(path, 'r', encoding='utf-8').read()


# 2. 分词处理
def cut(source: str):
    words = jieba.lcut(source)
    rm = {'却说', '二人', '不可', '不能', '如此', '如何'}
    for i in words:
        if i in rm:
            words.remove(i)
    return words


# 3. 统计结果
def count(words: list):
    di = {}
    for i in words:
        if len(i) > 1:
            di[i] = di.get(i, 0) + 1
    rep = {'诸葛亮': '孔明', '孔明曰': '孔明', '玄德曰': '玄德'}
    for i in rep:
        di[rep[i]] += di[i]
        di.pop(i)
    ret = list(di.items())
    ret.sort(key=lambda x: x[1], reverse=True)
    return ret


# 4. 输出结果
def showResult(result: list, cnt: int = 10):
    for i in range(cnt):
        print(result[i])


# 5. 主函数
def main():
    showResult(count(cut(getTxt('static/txt/三国.txt'))))


# 6. 调用主函数
main()
